clear
set more off
program drop _all
tempfile t
local texsave_settings "replace autonumber nofix"

* All required add-ons are stored in the /packages and /auxiliary folders
adopath ++ "$Wellness_WhatDoesWWDo/scripts/packages"
adopath ++ "$Wellness_WhatDoesWWDo/scripts/auxiliary"
mata: mata mlib index

**************************************************************************************
* Prepare data
**************************************************************************************

tempfile descriptive_stats

use "$Wellness_WhatDoesWWDo/data/proc/wellness_analysis.dta", clear
	
***
* Define variable sets for main analysis: strata vars, survey health vars, survey utilization vars, and claims vars
***

* Core variable sets (correspond to families used for multiple hypothesis testing)
local spending_var "spend_0715_0716"
local weighting_var "covg_0715_0716"

local strata_vars "male age50 age37_49 white salaryQ1 salaryQ2 salaryQ3 faculty AP"

local svy_hvars "everscreen active active_try cursmk othersmk formsmk drink drinkhvy chronic health1 health2 problems energy ehealth overweight badhealth sedentary"
local svy_uvars "druguse physician hospital"
local svy_prodvars "sickdays hrsworked50 jobsatisf1 jobsatisf2 mgmtsafety"

local claims_vars "`spending_var' spendOff_0715_0716 spendHosp_0715_0716 spendRx_0715_0716 nonzero_spend_0715_0716"

local admin_prodvars "sickleave_0815_0716 salary_0616"
local admin_hvars "marathon_2014_2016 gym_0815_0716"

local index_prodvars "prod_index_yr0"

* Aggregated sets (admin followed by survey)
local medical_spending "`claims_vars' `svy_uvars'"
local employment_productivity "`admin_prodvars' `svy_prodvars'"
local health_behaviors "`admin_hvars' `svy_hvars'"

gen constant=1


preserve 



****************************************************************************************************************************************************************************
* Selection tables: strata vars, survey health vars, survey utilization vars, claims vars, productivity vars
****************************************************************************************************************************************************************************

tempfile selection_results
tempfile selection_average

restore

* Selection regressions are only estimated using those assigned to treatment
keep if treat==1

***
* For each completion outcome (screening, HRA, activities), see if there is selection on a variable
***
preserve
local run_no = 1
local replace1 replace

* Loop over each family
foreach group in strata_vars claims_vars svy_uvars admin_prodvars svy_prodvars admin_hvars svy_hvars index_prodvars {

	* domain_weight_list will hold a list of weights to be used by the wyoung command. This will allow regressions within a family of domains to have different weights
	local domain_weight_list ""

	* Loop over each variable in the domain
	qui foreach xvar in ``group'' {

		* Use weights if LHS var is continuous measure of medical spending or sickleave
		if strpos("`xvar'","spend") & !strpos("`xvar'","nonzero") {
			local weights "[aw=`weighting_var']"
			local domain_weight_list "`domain_weight_list' `weighting_var'"
		}
		else if "`xvar'"=="sickleave_0815_0716" {
			local weights "[aw=sickdays_eligible_0815_0716]"
			local domain_weight_list "`domain_weight_list' sickdays_eligible_0815_0716"
		}
		else {
			local weights ""
			local domain_weight_list "`domain_weight_list' constant"
		}

		summ `xvar' `weights', meanonly
		local mean_xvar=r(mean)
		
		local replace replace
		foreach p in hra_c activity_f_c activity_s_c {
		
			* Regression to see if participation measure, on average, is predictive of a variable.
			gen pvar = `p'
			reg `xvar' i.pvar `weights', robust		
			
			* Use different -format- option for spending vars when saving results
			local tblformat "%5.3f"
			if (strpos("`xvar'","spend") | strpos("`xvar'","salary_")) & !strpos("`xvar'","nonzero") local tblformat "%5.1f"
			regsave 1.pvar using `selection_average', t p table("`p'", asterisk(10 5 1) format(`tblformat')) addlabel(outcome,"`xvar'", regressor,"`p'") `replace'
		
			local num_obs = e(N)
			local replace append
			restore, preserve
		}
		
		
		use "`selection_average'", clear

		* Add placeholder for the family-wise p-values (calculated below)
		keep if strpos(var,"_coef") | strpos(var,"_stderr") | strpos(var,"_pval")
		replace var = subinstr(var,"1.pvar","`xvar'",.)
		set obs `=_N+1'
		replace var = "`xvar'_wypval" if _n==_N
		
		gen run_no = `run_no'
		gen mean_sample = `mean_xvar'
		gen num_obs = `num_obs'
		gen outcome = "`xvar'"
		gen group = "`group'"
	
		if `run_no'>1 append using "`selection_results'"
		save "`selection_results'", replace
		
		local run_no = `run_no'+1
		restore, preserve	
	}

	* Calculate and store family-wise p-values for this domain
	foreach p in hra_c activity_f_c activity_s_c {
			
		wyoung ``group'', familyp(`p') bootstraps(10000) seed(11) cmd("_regress OUTCOMEVAR `p' [aw=WEIGHTVAR], robust") weights(`domain_weight_list') replace
		keep outcome p pwyoung
		gen group = "`group'"
		ren p pval_`p'
		
		merge 1:m outcome group using "`selection_results'", assert(match using)

		* Ensure we are merging on right outcome / regressor combination
		destring `p', force gen(tmp)
		assert abs(pval_`p' - tmp)<.1 if strpos(var,"_pval") & _merge==3
		
		tostring pwyoung, replace force format(%5.3f)
		replace `p' = pwyoung if strpos(var,"wypval") & mi(`p')

		drop pwyoung tmp _merge
		save "`selection_results'", replace
		restore, preserve
	}
}
restore

* Sort order: coef, std error, family-wise pval
use "`selection_results'", clear
sort run_no var
save "$Wellness_WhatDoesWWDo/results/intermediate_files/selection_results.dta", replace

use "$Wellness_WhatDoesWWDo/results/intermediate_files/selection_results.dta", clear

foreach v in hra_c activity_f_c activity_s_c {
	replace `v' = "("+`v'+")" if strpos(var,"_stderr")
	replace `v' = "["+`v'+"]" if strpos(var,"_pval")
	replace `v' = "["+`v'+"]" if strpos(var,"_wypval")
}

replace var = "" if strpos(var,"stderr")
replace var = subinstr(var,"_coef","",.)
replace var = "p" if strpos(var,"_pval")
replace var = "" if strpos(var,"_wypval")
drop if var=="p"
drop run_no pval_*
order var mean_sample num_obs
replace num_obs = . if mi(var)
replace mean_sample = . if mi(var)
tostring mean_sample, format(%7.0fc) gen(tmp) force
tostring mean_sample, format(%5.3f) replace force
replace mean_sample = tmp if (inlist(var,"salary_0616") | strpos(var,"spend")) & !strpos(var,"nonzero")
tostring num_obs, format(%7.0fc) replace force
drop tmp

* Label variables and write out the tables
cleanvars var
foreach v of varlist * {
	cap replace `v' = "" if `v'=="."
}

label var var "Selection Variable"
label var mean_sample "Mean"
label var num_obs "\(N\)"
cap label var screening_r "Registered Screening"
label var hra_c "Completed Screening and HRA"
cap label var activity_f_r "Registered Fall Activity" 
label var activity_f_c "Completed Fall Activity"
cap label var activity_s_r "Registered Spring Activity"
label var activity_s_c "Completed Spring Activity"

* Selection table in main text: claims vars, behavior vars, and salary vars. The rest will be in appendix.
gen main_text_vars =  inlist(outcome,"spend_0715_0716","nonzero_spend_0715_0716","salary_0616","salaryQ1","marathon_2014_2016","gym_0815_0716","hrsworked50","sickleave_0815_0716","prod_index_yr0")
drop outcome

preserve
foreach g in strata_vars medical_spending employment_productivity health_behaviors main_text_vars {
		
	* Strata vars
	if "`g'"=="strata_vars" {	
		
		keep if inlist(group,"strata_vars")
		
		local title = "Selection on Strata Variables"
		local options = "size(scriptsize)"
		local appendix appendix_
	}
	
	* Spending measures
	if "`g'"=="medical_spending" {
	
		keep if inlist(group,"claims_vars","svy_uvars")

		replace var = "\ \ \ \ " + var if strpos(var,"Drug s") | strpos(var,"Office s") | strpos(var,"Hospital s") | strpos(var,"Other s")
	
		local title = "Selection on Health Care Utilization Variables"
		local options = "size(scriptsize)"
		local appendix appendix_
	}	
	
	* Employment and productivity measures
	if "`g'"=="employment_productivity" {

		keep if inlist(group,"admin_prodvars","svy_prodvars","index_prodvars")
	
		local title = "Selection on Employment and Productivity Variables"
		local options = "size(scriptsize)"
		local appendix appendix_
	}

	* Health and behaviors
	if "`g'"=="health_behaviors" {
	
		keep if inlist(group,"admin_hvars","svy_hvars")

		local title = "Selection on Health and Behavior Variables"
		local options = "size(tiny)"
		local appendix appendix_
	}
	
	* Main text table: Claims, income, behaviors
	if "`g'"=="main_text_vars" {
		
		keep if main_text_vars

		sortobs, values(4/9 25/27 10/12 16/18 13/15 1/3 19/24)
		
		ingap 1 7 7 22 22
		replace var = "\textbf{A. Baseline Medical Spending}"        in 1 if mi(var)
		replace var = "\textbf{B. Baseline Productivity}"            in 9 if mi(var)		
		replace var = "\textbf{C. Baseline Health Behaviors}"        in 26 if mi(var)		
		
		local title = "Selection on Medical Spending, Productivity, and Health Behaviors"
		local options = "size(scriptsize) hlines(1 9 -6)"
		local appendix
	}	
	
	local fn "Notes: Column (1) reports the mean among subjects assigned to treatment. Columns (3)--(5) report the difference in means between those who completed the participation outcome and those who did not. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference (i.e., not adjusting for multiple outcomes). Family-wise \(p\)-values, reported in brackets, adjust for the number of outcome (selection) variables in each family and are estimated using 10,000 bootstraps."

	drop group main_text_vars
	texsave using "$Wellness_WhatDoesWWDo/results/tables/`appendix'selection_`g'.tex", `texsave_settings' varlabels marker("tab:`appendix'selection_`g'") title("`title'") headerlines("\midrule") footnote("`fn'") `options' 
	restore, preserve
}

restore, not


** EOF
